import pandas as pd
import sqlite3
import plotly.express as px
conn = sqlite3.connect('database.db')
# We will exclude mail-in depot locations as we cannot guarantee same-day repairs through the mail.
with open('same_day_completion.sql', 'r') as f:
sql = f.read()
df = pd.read_sql(sql,conn,parse_dates=['repair_date','repair_start_time', 'repair_complete_time'])
df['rework_90_day'] = df['rework_90_day'].astype(bool)
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 63193 entries, 0 to 63192 Data columns (total 19 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 work_order 63193 non-null int64 1 store_id 63193 non-null int64 2 repair_date 63193 non-null datetime64[ns] 3 warranty 63193 non-null int64 4 warranty_status 63193 non-null object 5 device_generation 63193 non-null object 6 product_description 63193 non-null object 7 customer_reported_code 50856 non-null object 8 triage_failure_code 39209 non-null object 9 triage_failure_description 39209 non-null object 10 asp_repair_code 32216 non-null object 11 asp_repair_description 32216 non-null object 12 rma_country 63193 non-null object 13 repair_start_time 63193 non-null datetime64[ns] 14 repair_complete_time 63193 non-null datetime64[ns] 15 repair_location 63193 non-null object 16 technician_id 63193 non-null int64 17 rework_90_day 63193 non-null bool 18 sales 63193 non-null float64 dtypes: bool(1), datetime64[ns](3), float64(1), int64(4), object(10) memory usage: 8.7+ MB
df['same_day_completion'] = (df['repair_complete_time'].dt.date == df['repair_start_time'].dt.date)
sales_same_day_completion = df.groupby(['same_day_completion']).agg({'sales': 'mean'}).reset_index()
store_same_day_completion = df.groupby(['store_id']).agg({'sales': 'sum', 'same_day_completion': 'mean','rework_90_day': 'mean', 'work_order':'count'})
tech_same_day_completion = df.groupby(['technician_id']).agg({'sales': 'sum', 'same_day_completion': 'mean','rework_90_day': 'mean', 'work_order':'count'})
van_store_same_day_completion = df.groupby(['repair_location']).agg({'sales': 'mean', 'same_day_completion': 'mean','rework_90_day': 'mean', 'work_order':'count'}).reset_index()
van_same_day_completion = df[df['repair_location'].str.contains('VAN')].groupby(['same_day_completion']).agg({'sales': 'mean'}).reset_index()
sales_same_day_completion
| same_day_completion | sales | |
|---|---|---|
| 0 | False | 78.631911 |
| 1 | True | 113.920280 |
sales_sdc_figure = px.bar(sales_same_day_completion,
x='same_day_completion',
y='sales',
title='Average of Sales when Repair is Completed the Same Day',
labels={'same_day_completion':'Completed Same Day','sales':'Average of Sales'},
text_auto='$.2f')
sales_sdc_figure.update_traces(hovertemplate='Completed Same Day: %{x}<br>'+
'Average of Sales: $%{y:,.2f}')
sales_sdc_figure.show()
van_same_day_completion
| same_day_completion | sales | |
|---|---|---|
| 0 | False | 93.135714 |
| 1 | True | 152.934787 |
van_sdc_figure = px.bar(van_same_day_completion,
x='same_day_completion',
y='sales',
title='Average of Sales when Repair is Completed the Same Day on Vans',
labels={'same_day_completion':'Completed Same Day','sales':'Average of Sales'},
text_auto='$.2f')
van_sdc_figure.update_traces(hovertemplate='Completed Same Day: %{x}<br>'+
'Average of Sales: $%{y:,.2f}')
van_sdc_figure.show()
van_store_same_day_completion
| repair_location | sales | same_day_completion | rework_90_day | work_order | |
|---|---|---|---|---|---|
| 0 | STORE | 104.382204 | 0.747295 | 0.034665 | 62195 |
| 1 | VAN | 152.095922 | 0.985972 | 0.006012 | 998 |
van_store_sdc_figure = px.bar(van_store_same_day_completion,
x='repair_location',
y='same_day_completion',
title='Same Day Completion on Vans versus in Stores',
labels={'same_day_completion':'Completed Same Day','repair_location':'Repair Location'},
text_auto=':.2%',
hover_data={'same_day_completion':':.2%'},
)
van_store_sdc_figure.update_yaxes(tickformat=".2%")
van_store_sdc_figure.show()
van_store_sales_figure = px.bar(van_store_same_day_completion,
x='repair_location',
y='sales',
title='Average of Sales for Stores versus Vans',
labels={'repair_location':'Repair Location','sales':'Average of Sales'},
text_auto='$.2f')
van_store_sales_figure.update_traces(hovertemplate='Repair Location: %{x}<br>'+
'Average of Sales: $%{y:,.2f}')
van_store_sales_figure.show()
store_same_day_completion.corr()
| sales | same_day_completion | rework_90_day | work_order | |
|---|---|---|---|---|
| sales | 1.000000 | 0.018494 | 0.070364 | 0.948159 |
| same_day_completion | 0.018494 | 1.000000 | -0.162836 | -0.030355 |
| rework_90_day | 0.070364 | -0.162836 | 1.000000 | 0.109489 |
| work_order | 0.948159 | -0.030355 | 0.109489 | 1.000000 |
tech_same_day_completion.corr()
| sales | same_day_completion | rework_90_day | work_order | |
|---|---|---|---|---|
| sales | 1.000000 | 0.044568 | 0.007596 | 0.961633 |
| same_day_completion | 0.044568 | 1.000000 | -0.062159 | 0.013962 |
| rework_90_day | 0.007596 | -0.062159 | 1.000000 | 0.033130 |
| work_order | 0.961633 | 0.013962 | 0.033130 | 1.000000 |
store_sales_wo_figure = px.scatter(store_same_day_completion, x='work_order',
y='sales',
title='Total Sales and Number of Work Orders by Store ID',
trendline = 'ols',
trendline_color_override = 'red',
labels={'sales':'Total Sales','work_order':'Work Orders'})
store_sales_wo_figure.update_traces(text=store_same_day_completion.index, hovertemplate='Store ID: %{text}<br>'+
'Work Orders: %{x}<br>'+
'Sales: $%{y:,.2f}')
store_sales_wo_figure.update_layout(yaxis=dict(tickformat='$,'))
store_sales_wo_figure.show()
tech_sales_wo_figure = px.scatter(tech_same_day_completion, x='work_order',
y='sales',
title='Total Sales and Number of Work Orders by Tech ID',
trendline = 'ols',
trendline_color_override = 'red',
labels={'sales':'Total Sales','work_order':'Work Orders'})
tech_sales_wo_figure.update_traces(text=tech_same_day_completion.index, hovertemplate='Tech ID: %{text}<br>'+
'Work Orders: %{x}<br>'+
'Sales: $%{y:,.2f}')
tech_sales_wo_figure.update_layout(yaxis=dict(tickformat='$,'))
tech_sales_wo_figure.show()
store_same_day_completion.describe()
| sales | same_day_completion | rework_90_day | work_order | |
|---|---|---|---|---|
| count | 729.000000 | 729.000000 | 729.000000 | 729.000000 |
| mean | 9113.639095 | 0.754935 | 0.031348 | 86.684499 |
| std | 7553.646189 | 0.151624 | 0.031109 | 73.001726 |
| min | 0.000000 | 0.000000 | 0.000000 | 1.000000 |
| 25% | 4167.340000 | 0.666667 | 0.004184 | 40.000000 |
| 50% | 7644.070000 | 0.774194 | 0.027624 | 72.000000 |
| 75% | 11887.370000 | 0.849138 | 0.047393 | 115.000000 |
| max | 67764.510000 | 1.000000 | 0.333333 | 651.000000 |
# Save quartile numbers to variable
lowest_quartile_sdc = store_same_day_completion['same_day_completion'].quantile(.25)
highest_quartile_wo = store_same_day_completion['work_order'].quantile(.75)
# Query the dataframe find those stores with low same day completion and high volume
trainers = store_same_day_completion.query('(same_day_completion <= @lowest_quartile_sdc) & (work_order >= @highest_quartile_wo)').copy()
trainers_fig = px.scatter(trainers, x='same_day_completion',
y='work_order',
title='Same Day Completion Rate and Number of Work Orders by Store ID',
labels={'same_day_completion':'Same Day Completion Rate',
'work_order':'Number of Work Orders'},
size='work_order',
text=trainers.index)
# Format the data to show on hover
trainers_fig.update_traces(text=trainers.index, hovertemplate='Store ID: %{text}<br>'+
'Same Day Completion Rate: %{x:,.2f}<br>'+
'Work Orders: %{y}<br>')
trainers_fig.show()
rework_sales = df.groupby(['rework_90_day']).agg({'sales': 'mean'}).reset_index()
rework_sales
| rework_90_day | sales | |
|---|---|---|
| 0 | False | 108.573414 |
| 1 | True | 8.093830 |
sales_rework_figure = px.bar(rework_sales,
x='rework_90_day',
y='sales',
title='Average of Sales when Repair is Reworked within 90 Days',
labels={'rework_90_day':'Reworked','sales':'Average of Sales'},
text_auto='$.2f')
sales_rework_figure.update_traces(hovertemplate='Reworked: %{x}<br>'+
'Average of Sales: $%{y:,.2f}')
sales_rework_figure.show()
df['turnaround_time'] = pd.to_timedelta(df['repair_complete_time'] - df['repair_start_time'], unit='hour')
df.head()
| work_order | store_id | repair_date | warranty | warranty_status | device_generation | product_description | customer_reported_code | triage_failure_code | triage_failure_description | ... | asp_repair_description | rma_country | repair_start_time | repair_complete_time | repair_location | technician_id | rework_90_day | sales | same_day_completion | turnaround_time | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 12984423 | 539 | 2021-08-10 | 1 | EW | Gen 3 | Smartphone 3 | None | None | None | ... | None | US | 2021-08-07 10:00:00 | 2021-08-10 08:14:00 | STORE | 11991 | False | 253.64 | False | 2 days 22:14:00 |
| 1 | 12742816 | 539 | 2021-08-10 | 1 | EW | Gen 4 | Smartphone 4 XL | None | None | None | ... | None | US | 2021-07-15 12:02:00 | 2021-08-10 08:12:00 | STORE | 11991 | False | 265.25 | False | 25 days 20:10:00 |
| 2 | 13008982 | 746 | 2021-08-09 | 1 | EW | Gen 3 | Smartphone 3 XL | None | None | None | ... | None | CA | 2021-08-09 12:57:00 | 2021-08-09 16:39:00 | STORE | 10681 | False | 178.86 | True | 0 days 03:42:00 |
| 3 | 13009475 | 554 | 2021-08-09 | 1 | OOW | Gen 3a | Smartphone 3a | Physical Damage | T028 | Display dead pixel, dark spots or foreign mate... | ... | Replace Display | US | 2021-08-09 15:16:00 | 2021-08-09 18:04:00 | STORE | 11745 | False | 109.99 | True | 0 days 02:48:00 |
| 4 | 12878951 | 554 | 2021-08-09 | 1 | OOW | Gen 1 | Smartphone | Power | None | None | ... | None | US | 2021-07-28 10:46:00 | 2021-08-09 12:03:00 | STORE | 11975 | False | 0.00 | False | 12 days 01:17:00 |
5 rows × 21 columns
tech_stats = df.groupby(['technician_id']).agg({'work_order': 'count',
'rework_90_day': 'mean',
'turnaround_time': 'mean'})
tech_stats['quality_repairs'] = (tech_stats['work_order'] * (1 - tech_stats['rework_90_day'])).astype(int)
tech_stats.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 3607 entries, 32 to 16642 Data columns (total 4 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 work_order 3607 non-null int64 1 rework_90_day 3607 non-null float64 2 turnaround_time 3607 non-null timedelta64[ns] 3 quality_repairs 3607 non-null int64 dtypes: float64(1), int64(2), timedelta64[ns](1) memory usage: 140.9 KB
tech_stack_rank = tech_stats.query('work_order >= 10').copy()
tech_stack_rank.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 1858 entries, 32 to 16484 Data columns (total 4 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 work_order 1858 non-null int64 1 rework_90_day 1858 non-null float64 2 turnaround_time 1858 non-null timedelta64[ns] 3 quality_repairs 1858 non-null int64 dtypes: float64(1), int64(2), timedelta64[ns](1) memory usage: 72.6 KB
tech_stack_rank['wo_rank'] = tech_stack_rank['work_order'].rank(ascending=False)
tech_stack_rank['tat_rank'] = tech_stack_rank['turnaround_time'].rank(ascending=True)
tech_stack_rank['tech_score'] = (((2 * tech_stack_rank['wo_rank']) + tech_stack_rank['tat_rank']) / 3) / ((1 - tech_stack_rank['rework_90_day']) ** 10)
tech_stack_rank = tech_stack_rank.sort_values('tech_score', ascending=True)
tech_stack_rank.head()
| work_order | rework_90_day | turnaround_time | quality_repairs | wo_rank | tat_rank | tech_score | |
|---|---|---|---|---|---|---|---|
| technician_id | |||||||
| 1956 | 116 | 0.008621 | 0 days 00:36:09.827586206 | 115 | 35.0 | 5.0 | 27.260982 |
| 9725 | 110 | 0.009091 | 0 days 01:10:39.272727272 | 109 | 43.0 | 14.0 | 36.520828 |
| 15425 | 151 | 0.019868 | 0 days 03:28:42.119205298 | 148 | 18.0 | 136.0 | 70.074420 |
| 3311 | 79 | 0.012658 | 0 days 01:22:53.164556962 | 78 | 96.5 | 20.0 | 80.646075 |
| 923 | 67 | 0.000000 | 0 days 00:00:00.895522388 | 67 | 133.5 | 1.0 | 89.333333 |
c = conn.cursor()
best_tech = c.execute("select first_name || ' ' || technician.last_name from technician where technician_id = 1956")
best_tech = c.fetchone()[0]
print(best_tech)
Mark Jacobs
conn.close()